package de.lmu.ifi.dbs.elki.datasource.parser;

import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource;
import de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray;
import de.lmu.ifi.dbs.elki.utilities.datastructures.hash.Unique;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

@Alias({"de.lmu.ifi.dbs.elki.parser.NumberVectorLabelParser", "de.lmu.ifi.dbs.elki.parser.RealVectorLabelParser"})
/* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser.class */
public class NumberVectorLabelParser<V extends NumberVector> extends AbstractStreamingParser {
    private static final Logging LOG = Logging.getLogger((Class<?>) NumberVectorLabelParser.class);
    private long[] labelIndices;
    protected NumberVector.Factory<V> factory;
    protected int mindim;
    protected int maxdim;
    protected BundleMeta meta;
    protected List<String> columnnames;
    protected boolean haslabels;
    protected V curvec;
    protected LabelList curlbl;
    protected DoubleArray attributes;
    final ArrayList<String> labels;
    Unique<String> unique;
    BundleStreamSource.Event nextevent;

    /* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser$Parameterizer.class */
    public static class Parameterizer<V extends NumberVector> extends AbstractStreamingParser.Parameterizer {
        public static final OptionID LABEL_INDICES_ID = new OptionID("parser.labelIndices", "A comma separated list of the indices of labels (may be numeric), counting whitespace separated entries in a line starting with 0. The corresponding entries will be treated as a label.");
        public static final OptionID VECTOR_TYPE_ID = new OptionID("parser.vector-type", "The type of vectors to create for numerical attributes.");
        protected long[] labelIndices;
        protected NumberVector.Factory<V> factory;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser.Parameterizer, de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer
        public void makeOptions(Parameterization parameterization) {
            super.makeOptions(parameterization);
            getLabelIndices(parameterization);
            getFactory(parameterization);
        }

        protected void getFactory(Parameterization parameterization) {
            ObjectParameter objectParameter = new ObjectParameter(VECTOR_TYPE_ID, (Class<?>) NumberVector.Factory.class, (Class<?>) DoubleVector.Factory.class);
            if (parameterization.grab(objectParameter)) {
                this.factory = (NumberVector.Factory) objectParameter.instantiateClass(parameterization);
            }
        }

        protected void getLabelIndices(Parameterization parameterization) {
            IntListParameter intListParameter = new IntListParameter(LABEL_INDICES_ID, true);
            if (parameterization.grab(intListParameter)) {
                this.labelIndices = intListParameter.getValueAsBitSet();
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser.Parameterizer, de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer
        public NumberVectorLabelParser<V> makeInstance() {
            return new NumberVectorLabelParser<>(this.format, this.labelIndices, this.factory);
        }
    }

    public NumberVectorLabelParser(CSVReaderFormat cSVReaderFormat, long[] jArr, NumberVector.Factory<V> factory) {
        super(cSVReaderFormat);
        this.meta = null;
        this.columnnames = null;
        this.haslabels = false;
        this.curvec = null;
        this.curlbl = null;
        this.attributes = new DoubleArray(11);
        this.labels = new ArrayList<>();
        this.unique = new Unique<>();
        this.nextevent = null;
        this.labelIndices = jArr;
        this.factory = factory;
    }

    public NumberVectorLabelParser(NumberVector.Factory<V> factory) {
        this(CSVReaderFormat.DEFAULT_FORMAT, null, factory);
    }

    public NumberVectorLabelParser(Pattern pattern, String str, Pattern pattern2, long[] jArr, NumberVector.Factory<V> factory) {
        this(new CSVReaderFormat(pattern, str, pattern2), jArr, factory);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isLabelColumn(int i) {
        return this.labelIndices != null && BitsUtil.get(this.labelIndices, i);
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser, de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser
    public void initStream(InputStream inputStream) {
        super.initStream(inputStream);
        this.mindim = Integer.MAX_VALUE;
        this.maxdim = 0;
        this.columnnames = null;
        this.haslabels = false;
        this.nextevent = null;
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource
    public BundleMeta getMeta() {
        return this.meta;
    }

    public BundleStreamSource.Event nextEvent() {
        if (this.nextevent != null) {
            BundleStreamSource.Event event = this.nextevent;
            this.nextevent = null;
            return event;
        }
        do {
            try {
                if (!this.reader.nextLineExceptComments()) {
                    return BundleStreamSource.Event.END_OF_STREAM;
                }
            } catch (IOException e) {
                throw new IllegalArgumentException("Error while parsing line " + this.reader.getLineNumber() + ".");
            }
        } while (!parseLineInternal());
        int dimensionality = this.curvec.getDimensionality();
        if (dimensionality > this.maxdim || this.mindim > dimensionality) {
            this.mindim = dimensionality < this.mindim ? dimensionality : this.mindim;
            this.maxdim = dimensionality > this.maxdim ? dimensionality : this.maxdim;
            buildMeta();
            this.nextevent = BundleStreamSource.Event.NEXT_OBJECT;
            return BundleStreamSource.Event.META_CHANGED;
        }
        if (this.curlbl == null || this.meta == null || !this.haslabels || this.meta.size() != 1) {
            return BundleStreamSource.Event.NEXT_OBJECT;
        }
        buildMeta();
        this.nextevent = BundleStreamSource.Event.NEXT_OBJECT;
        return BundleStreamSource.Event.META_CHANGED;
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser, de.lmu.ifi.dbs.elki.datasource.parser.Parser
    public void cleanup() {
        super.cleanup();
        this.unique.clear();
    }

    protected void buildMeta() {
        if (!this.haslabels) {
            this.meta = new BundleMeta(1);
            this.meta.add(getTypeInformation(this.mindim, this.maxdim));
        } else {
            this.meta = new BundleMeta(2);
            this.meta.add(getTypeInformation(this.mindim, this.maxdim));
            this.meta.add(TypeUtil.LABELLIST);
        }
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource
    public Object data(int i) {
        if (i > 1) {
            throw new ArrayIndexOutOfBoundsException();
        }
        return i == 0 ? this.curvec : this.curlbl;
    }

    protected boolean parseLineInternal() {
        int i = 0;
        while (this.tokenizer.valid()) {
            if (!isLabelColumn(i) && !this.tokenizer.isQuoted()) {
                try {
                    this.attributes.add(this.tokenizer.getDouble());
                } catch (NumberFormatException e) {
                }
                this.tokenizer.advance();
                i++;
            }
            String strippedSubstring = this.tokenizer.getStrippedSubstring();
            if (strippedSubstring.length() > 0) {
                this.haslabels = true;
                this.labels.add(this.unique.addOrGet(strippedSubstring));
            }
            this.tokenizer.advance();
            i++;
        }
        if (this.reader.getLineNumber() == 1 && this.attributes.size == 0) {
            this.columnnames = new ArrayList(this.labels);
            this.haslabels = false;
            this.curvec = null;
            this.curlbl = null;
            return false;
        }
        this.curvec = createVector();
        this.curlbl = LabelList.make(this.labels);
        this.attributes.clear();
        this.labels.clear();
        return true;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public V createVector() {
        return this.factory.newNumberVector(this.attributes, this.attributes);
    }

    SimpleTypeInformation<V> getTypeInformation(int i, int i2) {
        if (i > i2) {
            throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
        }
        if (i != i2) {
            return new VectorTypeInformation(this.factory, this.factory.getDefaultSerializer(), i, i2);
        }
        String[] strArr = null;
        if (this.columnnames != null && i <= this.columnnames.size()) {
            strArr = new String[i];
            int i3 = 0;
            for (int i4 = 0; i4 < i; i4++) {
                if (!isLabelColumn(i4)) {
                    strArr[i3] = this.columnnames.get(i4);
                    i3++;
                }
            }
            if (i3 != i) {
                strArr = null;
            }
        }
        return new VectorFieldTypeInformation(this.factory, i, strArr);
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser
    protected Logging getLogger() {
        return LOG;
    }
}
